Preparation


In [ ]:
%run "../Functions/2. Game sessions.ipynb"

In [ ]:
import unidecode

Tests

Tinkering


In [ ]:
accented_string = "Enormément"
# accented_string is of type 'unicode'

unaccented_string = unidecode.unidecode(accented_string)
unaccented_string
# unaccented_string contains 'Malaga'and is of type 'str'

getUserSessions tinkering


In [ ]:
_rmDF = rmdf152
userId = 'e8fed737-7c65-49c8-bf84-f8ae71c094f8'
#userId = getRandomRedMetricsGUID(_rmdf)

#def getUserSessions( _rmDF, userId):
result = _rmDF.loc[:,['userId','sessionId']][_rmDF['userId']==userId]['sessionId'].drop_duplicates().dropna(how='any')
result

In [ ]:
_sessionIndex = randint(0,len(result)-1)
_guid = result.iloc[_sessionIndex]
_guid

In [ ]:
userId

getTranslatedForm tinkering


In [ ]:
questionsAnswersTranslationsFR.T

In [ ]:
questionsAnswersTranslationsFR.loc["Are you interested in video games?"]

In [ ]:
questionsAnswersTranslationsFR.loc["Do you play video games?"]

In [ ]:
localizedFormFR = gformFR

# returns an English-indexed, English-localized answer dataframe
#  from a French-indexed, French-localized dataframe
#def getTranslatedForm( localizedFormFR ):
result = localizedFormFR.copy()

# translate answers
for question in result.columns:
    for index in result.index:
        answer = result.loc[index, question]
        if (0 != len(questionsAnswersTranslationsFR.loc[question])):
            if (answer in questionsAnswersTranslationsFR.loc[question]):
                result.loc[index, question] =\
                questionsAnswersTranslationsFR.loc[question][answer]
            else:
                print(question)
                #print(index)
                print(answer)
                print(questionsAnswersTranslationsFR.loc[question])
                print()
                print()
                print()

# translate questions
result = result.rename(columns=dict(zip(localizedFormFR.columns,gformEN.columns)))

result.T

In [ ]:
len(questionsAnswersTranslationsFR.loc['Timestamp'])

In [ ]:
getTranslatedForm( gformFR, questionsAnswersTranslationsFR ).iloc[1]

getRandomRedMetricsGUID tinkering


In [ ]:
from random import randint
uniqueUsers = rmdf152['userId'].dropna().unique()
userCount = len(uniqueUsers)
testlocalplayerguid = '0'
while (not isGUIDFormat(testlocalplayerguid)):
    userIndex = randint(0,userCount-1)
    testlocalplayerguid = uniqueUsers[userIndex]
testlocalplayerguid

In [ ]:
sessionscount = rmdf152["sessionId"].nunique()
sessionscount

In [ ]:
platforms = rmdf152["customData.platform"].unique()
platforms

print("part100="+str(part100.head(1))) print("part131="+str(part131.head(1))) print("part132="+str(part132.head(1))) print("part133="+str(part133.head(1))) print("part140="+str(part140.head(1))) print("part150="+str(part150.head(1))) print("part151="+str(part151.head(1))) print("part152="+str(part152.head(1))) print("df="+str(df.head(1)))


In [ ]:
testGUID = '"4dbc2f43-421c-4e23-85d4-f17723ff8c66"'

In [ ]:
# includewithoutusers=True will count sessions that do not have any userId attached
getSessionsCount( rmdf152, testGUID)

print("part100="+str(part100.columns)) print("part131="+str(part131.columns)) print("part132="+str(part132.columns)) print("part133="+str(part133.columns)) print("part140="+str(part140.columns)) print("part150="+str(part150.columns)) print("part151="+str(part151.columns)) print("part152="+str(part152.columns))

print("dfconcat="+str(dfconcat.columns))

print("df="+str(df.columns))

df.columns


In [ ]:
sessionsList = getUserSessions(rmdf152, testGUID)
sessionsList

In [ ]:
sessionsList = rmdf152[rmdf152['type']=='start']
sessionsList = sessionsList.drop('type', 1)
sessionsList = sessionsList.dropna(how='any')
userSessionsList = sessionsList[sessionsList['userId']==testGUID]
userSessionsList

In [ ]:
#print(testGUID)
sessionsList = getUserSessions(rmdf152, testGUID)
#sessionsList = getAllSessions(rmdf152, testGUID.replace('"',''))
#print(type(sessionsList))
sessionsList.shape[0]

In [ ]:
allSessions = rmdf152.loc[:,['userId', 'sessionId']].drop_duplicates()
allSessions.head()

In [ ]:
allSessions.groupby('userId').size().reset_index(name='counts').sort_values(by='counts', ascending=False).head(10)

In [ ]:
#getUserSessionsCounts(getNormalizedRedMetricsCSV(part152)).head(10)

In [ ]:
allSessions.groupby('userId').agg(['count']).head() #.sort_values(by='sessionId', ascending=False).head(10)

In [ ]:
#df2 = pd.concat([df151, rmdf152])
#df2.head(2)
#print(df2.columns)
#df2columns = df2.columns.values
#type(df2columns)
#df2columns
#newColumns = np.concatenate((minimalInitializationColumns, df2columns))
#newColumns
#df2 = getNormalizedRedMetricsCSV(df)

getRandomSessionGUID tinkering


In [ ]:
getRandomSessionGUID()

In [ ]:
_userId = '"e8fed737-7c65-49c8-bf84-f8ae71c094f8"'
type(rmdf152['userId'].dropna().unique()), type(getUserSessions( rmdf152, _userId ))

In [ ]:
_userId = 'e8fed737-7c65-49c8-bf84-f8ae71c094f8'
_uniqueSessions = getUserSessions( rmdf152, _userId )
len(_uniqueSessions)
_uniqueSessions

In [ ]:
#_userId = ''
_userId = '"e8fed737-7c65-49c8-bf84-f8ae71c094f8"'

#def getRandomSessionGUID( _userId = '' ):    
rmId = _userId
if( not(isGUIDFormat(_userId))):
   rmId = getRandomRedMetricsGUID()

_uniqueSessions = getUserSessions( rmdf152, rmId )

_sessionsCount = len(_uniqueSessions)
_guid = ""
_sessionIndex = randint(0,_sessionsCount-1)
_guid = _uniqueSessions.iloc[_sessionIndex]
_guid

In [ ]:
rmId

In [ ]:
_sessionIndex

In [ ]:
_sessionsCount

In [ ]:
randint(0,0)

In [ ]:
_uniqueSessions

In [ ]:
getRandomSessionGUID()

getFirstEventDate tinkering


In [ ]:
userId = testGUID

#print('----------------------uid='+str(uid)+'----------------------')
sessions = getUserSessions(rmdf152, userId)

firstGameTime = pd.to_datetime('2050-12-31T12:59:59.000Z', utc=True)

for session in sessions:
    #print('-----------------------------------------session='+str(session))
    timedEvents = rmdf152[rmdf152['sessionId']==session]
    timedEvents = timedEvents.dropna(subset=['section'])

    if(len(timedEvents) > 0):
        timedEvents['userTime'] = timedEvents['userTime'].map(lambda t: pd.to_datetime(t, utc=True))

        earliest = timedEvents['userTime'].min()
        if(earliest < firstGameTime):
            firstGameTime = earliest
    #else:
        #print('no event with section')
#print('-----------------------------------------')
print("firstGameTime=" + str(firstGameTime))

In [ ]:
rmdf152[rmdf152['userId']==userId]

In [ ]:
sessions = getUserSessions(rmdf152, userId)
sessions